#!/usr/bin/env python3
#
# This exports the emoji_names.py data set to a CSV file in the same
# format used as input for import_emoji_names_from_csv.  We use this
# as part of a test for the correctness of the import process (one can
# compare the exported CSV file to the original CSV file, and if the
# data round-tripped with no changes, we know everything is working
# correctly).
import argparse
import csv
import os
import re
from typing import Any, Dict, List

import orjson

from emoji_setup_utils import get_emoji_code

TOOLS_DIR_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
ZULIP_PATH = os.path.dirname(TOOLS_DIR_PATH)
# `emoji.json` file is same in all four emoji-datasource packages.
EMOJI_DATA_PATH = os.path.join(ZULIP_PATH, "node_modules", "emoji-datasource-google", "emoji.json")

sorting_info: Dict[str, Any] = {}
column_names = [
    "Codepoint",
    "New sorting info",
    "zulip (main)",
    "zulip (alternates)",
    "explanation",
]
category_index = {
    "Smileys & People": "1",
    "Animals & Nature": "2",
    "Food & Drink": "3",
    "Activities": "4",
    "Travel & Places": "5",
    "Objects": "6",
    "Symbols": "7",
    "Flags": "8",
    "Skin Tones": "9",
}

name_entry_regex = re.compile(
    r"'(?P<emoji_code>[a-z0-9-]+)': "
    r"{'canonical_name': '(?P<canonical_name>[+-]?[a-z0-9_X-]+)',[\n ]+"
    r"'aliases': \[(?P<aliases>('([+-]?[a-z0-9_, X-]+)'[, ]{0,2})*)\]},"
)
explanation_regex = re.compile(r"    # (?P<explanation_line>[^\r\n\t]+)")


def prepare_sorting_info() -> None:
    emoji_data: List[Dict[str, Any]] = []
    with open(EMOJI_DATA_PATH, "rb") as fp:
        emoji_data = orjson.loads(fp.read())

    for emoji_dict in emoji_data:
        emoji_code = get_emoji_code(emoji_dict)
        sort_order = str(emoji_dict["sort_order"]).strip()
        sorting_info[emoji_code] = {
            "category": emoji_dict["category"],
            "sort_order": sort_order.rjust(3, "0"),
        }


def get_sorting_info(emoji_code: str) -> str:
    category = sorting_info[emoji_code]["category"]
    category = category_index[category] + "-" + category
    sort_order = sorting_info[emoji_code]["sort_order"]
    return " ".join([category, sort_order])


def prepare_explanation(explanation_lines: List[str]) -> str:
    return " ".join(explanation_lines)


def prepare_aliases(captured_aliases: str) -> str:
    aliases = []
    for alias in captured_aliases.split(", "):
        aliases.append(alias.strip("'"))
    return ", ".join(aliases)


def main() -> None:
    description = (
        "This script is used for exporting `emoji_names.py` to comma separated file. It "
        "takes the path of output csv file and path to `emoji_names.py` as arguments."
    )
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        "--input-file",
        dest="input_file_path",
        metavar="<path>",
        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.py"),
        help="Path to the file from which data is to be read.",
    )
    parser.add_argument(
        "--output-file",
        dest="output_file_path",
        metavar="<path>",
        default=os.path.join(TOOLS_DIR_PATH, "setup", "emoji", "emoji_names.csv"),
        help="Path to the output csv file.",
    )

    args = parser.parse_args()
    prepare_sorting_info()
    output_data = [column_names]
    explanation_lines: List[str] = []
    with open(args.input_file_path) as fp:
        for line in fp.readlines():
            match = name_entry_regex.search(line)
            if match is not None:
                emoji_code = match.group("emoji_code")
                sort_info = get_sorting_info(emoji_code)
                canonical_name = match.group("canonical_name")
                aliases = prepare_aliases(match.group("aliases"))
                explanation = prepare_explanation(explanation_lines)
                output_data.append(
                    [
                        emoji_code,
                        sort_info,
                        canonical_name,
                        aliases,
                        explanation,
                    ]
                )
                explanation_lines = []
                continue

            match = explanation_regex.search(line)
            if match is not None:
                explanation_line = match.group("explanation_line").strip()
                explanation_lines.append(explanation_line)

    with open(args.output_file_path, "w") as f:
        writer = csv.writer(f, dialect="excel")
    writer.writerows(output_data)
    # The CSV file exported by Google Sheets doesn't have a newline
    # character in the end. So we also strip the last newline character
    # so that round-trip conversion test passes.
    line_sep_len = len(os.linesep)
    fp.truncate(fp.tell() - line_sep_len)
    fp.close()


if __name__ == "__main__":
    main()
